﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.IO;

namespace SpiderServer
{
	public class HtmlReader : Sgml.SgmlReader
	{

		public HtmlReader(TextReader reader)
			: base()
		{
			base.InputStream = reader;
			base.DocType = "HTML";
		}

		public HtmlReader(string content)
			: base()
		{
			base.InputStream = new StringReader(content);
			base.DocType = "HTML";
		}

		public override bool Read()
		{

			bool status = base.Read();

			if (status)
			{

				if (base.NodeType == XmlNodeType.Element)
				{

					// Got a node with prefix. This must be one

					// of those "<o:p>" or something else.

					// Skip this node entirely. We want prefix



					// less nodes so that the resultant XML 

					// requires not namespace.

					if (base.Name.IndexOf(':') > 0)

						base.Skip();

				}

			}

			return status;
		}
	}
}
